library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
raw_data <- read.csv("/Users/ibhargava/Desktop/globalterrorismdb_0718dist.csv") 
data2 <- raw_data
summary(raw_data)
##     eventid              iyear          imonth            iday      
##  Min.   :1.970e+11   Min.   :1970   Min.   : 0.000   Min.   : 0.00  
##  1st Qu.:1.991e+11   1st Qu.:1991   1st Qu.: 4.000   1st Qu.: 8.00  
##  Median :2.009e+11   Median :2009   Median : 6.000   Median :15.00  
##  Mean   :2.003e+11   Mean   :2003   Mean   : 6.467   Mean   :15.51  
##  3rd Qu.:2.014e+11   3rd Qu.:2014   3rd Qu.: 9.000   3rd Qu.:23.00  
##  Max.   :2.017e+11   Max.   :2017   Max.   :12.000   Max.   :31.00  
##                                                                     
##   approxdate           extended        resolution           country    
##  Length:181691      Min.   :0.00000   Length:181691      Min.   :   4  
##  Class :character   1st Qu.:0.00000   Class :character   1st Qu.:  78  
##  Mode  :character   Median :0.00000   Mode  :character   Median :  98  
##                     Mean   :0.04535                      Mean   : 132  
##                     3rd Qu.:0.00000                      3rd Qu.: 160  
##                     Max.   :1.00000                      Max.   :1004  
##                                                                        
##  country_txt            region        region_txt         provstate        
##  Length:181691      Min.   : 1.000   Length:181691      Length:181691     
##  Class :character   1st Qu.: 5.000   Class :character   Class :character  
##  Mode  :character   Median : 6.000   Mode  :character   Mode  :character  
##                     Mean   : 7.161                                        
##                     3rd Qu.:10.000                                        
##                     Max.   :12.000                                        
##                                                                           
##      city              latitude        longitude          specificity   
##  Length:181691      Min.   :-53.16   Min.   :-86185896   Min.   :1.000  
##  Class :character   1st Qu.: 11.51   1st Qu.:        5   1st Qu.:1.000  
##  Mode  :character   Median : 31.47   Median :       43   Median :1.000  
##                     Mean   : 23.50   Mean   :     -459   Mean   :1.451  
##                     3rd Qu.: 34.69   3rd Qu.:       69   3rd Qu.:1.000  
##                     Max.   : 74.63   Max.   :      179   Max.   :5.000  
##                     NA's   :4556     NA's   :4557        NA's   :6      
##     vicinity         location           summary              crit1       
##  Min.   :-9.0000   Length:181691      Length:181691      Min.   :0.0000  
##  1st Qu.: 0.0000   Class :character   Class :character   1st Qu.:1.0000  
##  Median : 0.0000   Mode  :character   Mode  :character   Median :1.0000  
##  Mean   : 0.0683                                         Mean   :0.9885  
##  3rd Qu.: 0.0000                                         3rd Qu.:1.0000  
##  Max.   : 1.0000                                         Max.   :1.0000  
##                                                                          
##      crit2            crit3          doubtterr        alternative    
##  Min.   :0.0000   Min.   :0.0000   Min.   :-9.0000   Min.   :1.00    
##  1st Qu.:1.0000   1st Qu.:1.0000   1st Qu.: 0.0000   1st Qu.:1.00    
##  Median :1.0000   Median :1.0000   Median : 0.0000   Median :1.00    
##  Mean   :0.9931   Mean   :0.8757   Mean   :-0.5232   Mean   :1.29    
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.: 0.0000   3rd Qu.:1.00    
##  Max.   :1.0000   Max.   :1.0000   Max.   : 1.0000   Max.   :5.00    
##                                    NA's   :1         NA's   :152680  
##  alternative_txt       multiple         success          suicide       
##  Length:181691      Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  Class :character   1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.00000  
##  Mode  :character   Median :0.0000   Median :1.0000   Median :0.00000  
##                     Mean   :0.1378   Mean   :0.8896   Mean   :0.03651  
##                     3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.00000  
##                     Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##                     NA's   :1                                          
##   attacktype1    attacktype1_txt     attacktype2     attacktype2_txt   
##  Min.   :1.000   Length:181691      Min.   :1.00     Length:181691     
##  1st Qu.:2.000   Class :character   1st Qu.:2.00     Class :character  
##  Median :3.000   Mode  :character   Median :2.00     Mode  :character  
##  Mean   :3.248                      Mean   :3.72                       
##  3rd Qu.:3.000                      3rd Qu.:7.00                       
##  Max.   :9.000                      Max.   :9.00                       
##                                     NA's   :175377                     
##   attacktype3     attacktype3_txt      targtype1     targtype1_txt     
##  Min.   :1.00     Length:181691      Min.   : 1.00   Length:181691     
##  1st Qu.:2.00     Class :character   1st Qu.: 3.00   Class :character  
##  Median :7.00     Mode  :character   Median : 4.00   Mode  :character  
##  Mean   :5.25                        Mean   : 8.44                     
##  3rd Qu.:7.00                        3rd Qu.:14.00                     
##  Max.   :8.00                        Max.   :22.00                     
##  NA's   :181263                                                        
##   targsubtype1    targsubtype1_txt      corp1             target1         
##  Min.   :  1.00   Length:181691      Length:181691      Length:181691     
##  1st Qu.: 22.00   Class :character   Class :character   Class :character  
##  Median : 35.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 46.97                                                           
##  3rd Qu.: 74.00                                                           
##  Max.   :113.00                                                           
##  NA's   :10373                                                            
##     natlty1       natlty1_txt          targtype2      targtype2_txt     
##  Min.   :   4.0   Length:181691      Min.   : 1.00    Length:181691     
##  1st Qu.:  83.0   Class :character   1st Qu.: 4.00    Class :character  
##  Median : 101.0   Mode  :character   Median :14.00    Mode  :character  
##  Mean   : 127.7                      Mean   :10.25                      
##  3rd Qu.: 173.0                      3rd Qu.:14.00                      
##  Max.   :1004.0                      Max.   :22.00                      
##  NA's   :1559                        NA's   :170547                     
##   targsubtype2    targsubtype2_txt      corp2             target2         
##  Min.   :  1.00   Length:181691      Length:181691      Length:181691     
##  1st Qu.: 34.00   Class :character   Class :character   Class :character  
##  Median : 67.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 55.31                                                           
##  3rd Qu.: 69.00                                                           
##  Max.   :113.00                                                           
##  NA's   :171006                                                           
##     natlty2       natlty2_txt          targtype3      targtype3_txt     
##  Min.   :   4.0   Length:181691      Min.   : 1.00    Length:181691     
##  1st Qu.:  92.0   Class :character   1st Qu.: 3.00    Class :character  
##  Median :  98.0   Mode  :character   Median :14.00    Mode  :character  
##  Mean   : 131.2                      Mean   :10.02                      
##  3rd Qu.: 182.0                      3rd Qu.:14.00                      
##  Max.   :1004.0                      Max.   :22.00                      
##  NA's   :170863                      NA's   :180515                     
##   targsubtype3    targsubtype3_txt      corp3             target3         
##  Min.   :  1.00   Length:181691      Length:181691      Length:181691     
##  1st Qu.: 33.00   Class :character   Class :character   Class :character  
##  Median : 67.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 55.55                                                           
##  3rd Qu.: 73.00                                                           
##  Max.   :113.00                                                           
##  NA's   :180594                                                           
##     natlty3       natlty3_txt           gname             gsubname        
##  Min.   :   4.0   Length:181691      Length:181691      Length:181691     
##  1st Qu.:  75.0   Class :character   Class :character   Class :character  
##  Median : 110.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 144.6                                                           
##  3rd Qu.: 182.0                                                           
##  Max.   :1004.0                                                           
##  NA's   :180544                                                           
##     gname2           gsubname2            gname3           gsubname3        
##  Length:181691      Length:181691      Length:181691      Length:181691     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     motive           guncertain1      guncertain2      guncertain3    
##  Length:181691      Min.   :0.0000   Min.   :0.00     Min.   :0.00    
##  Class :character   1st Qu.:0.0000   1st Qu.:0.00     1st Qu.:0.00    
##  Mode  :character   Median :0.0000   Median :0.00     Median :0.00    
##                     Mean   :0.0814   Mean   :0.27     Mean   :0.19    
##                     3rd Qu.:0.0000   3rd Qu.:1.00     3rd Qu.:0.00    
##                     Max.   :1.0000   Max.   :1.00     Max.   :1.00    
##                     NA's   :380      NA's   :179736   NA's   :181371  
##    individual          nperps            nperpcap         claimed     
##  Min.   :0.00000   Min.   :  -99.00   Min.   :-99.00   Min.   :-9.00  
##  1st Qu.:0.00000   1st Qu.:  -99.00   1st Qu.:  0.00   1st Qu.: 0.00  
##  Median :0.00000   Median :  -99.00   Median :  0.00   Median : 0.00  
##  Mean   :0.00295   Mean   :  -65.36   Mean   : -1.52   Mean   : 0.05  
##  3rd Qu.:0.00000   3rd Qu.:    1.00   3rd Qu.:  0.00   3rd Qu.: 0.00  
##  Max.   :1.00000   Max.   :25000.00   Max.   :406.00   Max.   : 1.00  
##                    NA's   :71115      NA's   :69489    NA's   :66120  
##    claimmode      claimmode_txt          claim2         claimmode2    
##  Min.   : 1.00    Length:181691      Min.   :-9.00    Min.   : 1.00   
##  1st Qu.: 6.00    Class :character   1st Qu.: 0.00    1st Qu.: 6.00   
##  Median : 8.00    Mode  :character   Median : 0.00    Median : 7.00   
##  Mean   : 7.02                       Mean   : 0.25    Mean   : 7.18   
##  3rd Qu.: 8.00                       3rd Qu.: 1.00    3rd Qu.:10.00   
##  Max.   :10.00                       Max.   : 1.00    Max.   :10.00   
##  NA's   :162608                      NA's   :179801   NA's   :181075  
##  claimmode2_txt         claim3         claimmode3     claimmode3_txt    
##  Length:181691      Min.   :0.00     Min.   : 1.00    Length:181691     
##  Class :character   1st Qu.:0.00     1st Qu.: 4.00    Class :character  
##  Mode  :character   Median :0.00     Median : 7.00    Mode  :character  
##                     Mean   :0.41     Mean   : 6.73                      
##                     3rd Qu.:1.00     3rd Qu.: 9.00                      
##                     Max.   :1.00     Max.   :10.00                      
##                     NA's   :181373   NA's   :181558                     
##    compclaim        weaptype1      weaptype1_txt       weapsubtype1  
##  Min.   :-9.0     Min.   : 1.000   Length:181691      Min.   : 1.00  
##  1st Qu.:-9.0     1st Qu.: 5.000   Class :character   1st Qu.: 5.00  
##  Median :-9.0     Median : 6.000   Mode  :character   Median :12.00  
##  Mean   :-6.3     Mean   : 6.447                      Mean   :11.12  
##  3rd Qu.: 0.0     3rd Qu.: 6.000                      3rd Qu.:16.00  
##  Max.   : 1.0     Max.   :13.000                      Max.   :31.00  
##  NA's   :176852                                       NA's   :20768  
##  weapsubtype1_txt     weaptype2      weaptype2_txt       weapsubtype2   
##  Length:181691      Min.   : 1.00    Length:181691      Min.   : 1.00   
##  Class :character   1st Qu.: 5.00    Class :character   1st Qu.: 5.00   
##  Mode  :character   Median : 6.00    Mode  :character   Median : 7.00   
##                     Mean   : 6.81                       Mean   :10.75   
##                     3rd Qu.: 8.00                       3rd Qu.:18.00   
##                     Max.   :13.00                       Max.   :31.00   
##                     NA's   :168564                      NA's   :170149  
##  weapsubtype2_txt     weaptype3      weaptype3_txt       weapsubtype3   
##  Length:181691      Min.   : 2.00    Length:181691      Min.   : 1.00   
##  Class :character   1st Qu.: 5.00    Class :character   1st Qu.: 4.00   
##  Mode  :character   Median : 6.00    Mode  :character   Median : 7.00   
##                     Mean   : 6.91                       Mean   :11.64   
##                     3rd Qu.: 9.00                       3rd Qu.:20.00   
##                     Max.   :13.00                       Max.   :28.00   
##                     NA's   :179828                      NA's   :179998  
##  weapsubtype3_txt     weaptype4      weaptype4_txt       weapsubtype4   
##  Length:181691      Min.   : 5.00    Length:181691      Min.   : 2.00   
##  Class :character   1st Qu.: 5.00    Class :character   1st Qu.: 3.00   
##  Mode  :character   Median : 6.00    Mode  :character   Median : 9.50   
##                     Mean   : 6.25                       Mean   :10.84   
##                     3rd Qu.: 6.00                       3rd Qu.:16.00   
##                     Max.   :12.00                       Max.   :28.00   
##                     NA's   :181618                      NA's   :181621  
##  weapsubtype4_txt    weapdetail            nkill             nkillus       
##  Length:181691      Length:181691      Min.   :   0.000   Min.   :   0.00  
##  Class :character   Class :character   1st Qu.:   0.000   1st Qu.:   0.00  
##  Mode  :character   Mode  :character   Median :   0.000   Median :   0.00  
##                                        Mean   :   2.403   Mean   :   0.05  
##                                        3rd Qu.:   2.000   3rd Qu.:   0.00  
##                                        Max.   :1570.000   Max.   :1360.00  
##                                        NA's   :10313      NA's   :64446    
##     nkillter          nwound            nwoundus         nwoundte     
##  Min.   :  0.00   Min.   :   0.000   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:  0.00   1st Qu.:   0.000   1st Qu.:  0.00   1st Qu.:  0.00  
##  Median :  0.00   Median :   0.000   Median :  0.00   Median :  0.00  
##  Mean   :  0.51   Mean   :   3.168   Mean   :  0.04   Mean   :  0.11  
##  3rd Qu.:  0.00   3rd Qu.:   2.000   3rd Qu.:  0.00   3rd Qu.:  0.00  
##  Max.   :500.00   Max.   :8191.000   Max.   :751.00   Max.   :200.00  
##  NA's   :66958    NA's   :16311      NA's   :64702    NA's   :69143   
##     property         propextent     propextent_txt       propvalue         
##  Min.   :-9.0000   Min.   :1.0      Length:181691      Min.   :       -99  
##  1st Qu.: 0.0000   1st Qu.:3.0      Class :character   1st Qu.:       -99  
##  Median : 1.0000   Median :3.0      Mode  :character   Median :       -99  
##  Mean   :-0.5446   Mean   :3.3                         Mean   :    208812  
##  3rd Qu.: 1.0000   3rd Qu.:4.0                         3rd Qu.:      1000  
##  Max.   : 1.0000   Max.   :4.0                         Max.   :2700000000  
##                    NA's   :117626                      NA's   :142702      
##  propcomment          ishostkid           nhostkid          nhostkidus    
##  Length:181691      Min.   :-9.00000   Min.   :  -99.00   Min.   :-99.00  
##  Class :character   1st Qu.: 0.00000   1st Qu.:    1.00   1st Qu.:  0.00  
##  Mode  :character   Median : 0.00000   Median :    2.00   Median :  0.00  
##                     Mean   : 0.05905   Mean   :    4.53   Mean   : -0.35  
##                     3rd Qu.: 0.00000   3rd Qu.:    4.00   3rd Qu.:  0.00  
##                     Max.   : 1.00000   Max.   :17000.00   Max.   : 86.00  
##                     NA's   :178        NA's   :168119     NA's   :168174  
##      nhours           ndays            divert          kidhijcountry     
##  Min.   :-99.00   Min.   : -99.00   Length:181691      Length:181691     
##  1st Qu.:-99.00   1st Qu.: -99.00   Class :character   Class :character  
##  Median :-99.00   Median : -99.00   Mode  :character   Mode  :character  
##  Mean   :-46.79   Mean   : -32.52                                        
##  3rd Qu.:  0.00   3rd Qu.:   4.00                                        
##  Max.   :999.00   Max.   :2454.00                                        
##  NA's   :177628   NA's   :173567                                         
##      ransom         ransomamt           ransomamtus          ransompaid       
##  Min.   :-9.00    Min.   :       -99   Min.   :      -99   Min.   :      -99  
##  1st Qu.: 0.00    1st Qu.:         0   1st Qu.:        0   1st Qu.:      -99  
##  Median : 0.00    Median :     15000   Median :        0   Median :        0  
##  Mean   :-0.15    Mean   :   3172530   Mean   :   578487   Mean   :   717944  
##  3rd Qu.: 0.00    3rd Qu.:    400000   3rd Qu.:        0   3rd Qu.:     1273  
##  Max.   : 1.00    Max.   :1000000000   Max.   :132000000   Max.   :275000000  
##  NA's   :104310   NA's   :180341       NA's   :181128      NA's   :180917     
##   ransompaidus      ransomnote        hostkidoutcome   hostkidoutcome_txt
##  Min.   :  -99.0   Length:181691      Min.   :1.00     Length:181691     
##  1st Qu.:    0.0   Class :character   1st Qu.:2.00     Class :character  
##  Median :    0.0   Mode  :character   Median :4.00     Mode  :character  
##  Mean   :  240.4                      Mean   :4.63                       
##  3rd Qu.:    0.0                      3rd Qu.:7.00                       
##  Max.   :48000.0                      Max.   :7.00                       
##  NA's   :181139                       NA's   :170700                     
##    nreleased         addnotes            scite1             scite2         
##  Min.   : -99.00   Length:181691      Length:181691      Length:181691     
##  1st Qu.: -99.00   Class :character   Class :character   Class :character  
##  Median :   0.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : -29.02                                                           
##  3rd Qu.:   1.00                                                           
##  Max.   :2769.00                                                           
##  NA's   :171291                                                            
##     scite3            dbsource            INT_LOG          INT_IDEO     
##  Length:181691      Length:181691      Min.   :-9.000   Min.   :-9.000  
##  Class :character   Class :character   1st Qu.:-9.000   1st Qu.:-9.000  
##  Mode  :character   Mode  :character   Median :-9.000   Median :-9.000  
##                                        Mean   :-4.544   Mean   :-4.464  
##                                        3rd Qu.: 0.000   3rd Qu.: 0.000  
##                                        Max.   : 1.000   Max.   : 1.000  
##                                                                         
##     INT_MISC           INT_ANY         related         
##  Min.   :-9.00000   Min.   :-9.000   Length:181691     
##  1st Qu.: 0.00000   1st Qu.:-9.000   Class :character  
##  Median : 0.00000   Median : 0.000   Mode  :character  
##  Mean   : 0.09001   Mean   :-3.946                     
##  3rd Qu.: 0.00000   3rd Qu.: 0.000                     
##  Max.   : 1.00000   Max.   : 1.000                     
## 
sapply(raw_data, function(x) sum(is.na(x)))
##            eventid              iyear             imonth               iday 
##                  0                  0                  0                  0 
##         approxdate           extended         resolution            country 
##                  0                  0                  0                  0 
##        country_txt             region         region_txt          provstate 
##                  0                  0                  0                  0 
##               city           latitude          longitude        specificity 
##                  0               4556               4557                  6 
##           vicinity           location            summary              crit1 
##                  0                  0                  0                  0 
##              crit2              crit3          doubtterr        alternative 
##                  0                  0                  1             152680 
##    alternative_txt           multiple            success            suicide 
##                  0                  1                  0                  0 
##        attacktype1    attacktype1_txt        attacktype2    attacktype2_txt 
##                  0                  0             175377                  0 
##        attacktype3    attacktype3_txt          targtype1      targtype1_txt 
##             181263                  0                  0                  0 
##       targsubtype1   targsubtype1_txt              corp1            target1 
##              10373                  0                 33                  1 
##            natlty1        natlty1_txt          targtype2      targtype2_txt 
##               1559                  0             170547                  0 
##       targsubtype2   targsubtype2_txt              corp2            target2 
##             171006                  0                  0                  0 
##            natlty2        natlty2_txt          targtype3      targtype3_txt 
##             170863                  0             180515                  0 
##       targsubtype3   targsubtype3_txt              corp3            target3 
##             180594                  0                  0                  0 
##            natlty3        natlty3_txt              gname           gsubname 
##             180544                  0                  0                  0 
##             gname2          gsubname2             gname3          gsubname3 
##                  0                  0                  0                  0 
##             motive        guncertain1        guncertain2        guncertain3 
##                  0                380             179736             181371 
##         individual             nperps           nperpcap            claimed 
##                  0              71115              69489              66120 
##          claimmode      claimmode_txt             claim2         claimmode2 
##             162608                  0             179801             181075 
##     claimmode2_txt             claim3         claimmode3     claimmode3_txt 
##                  0             181373             181558                  0 
##          compclaim          weaptype1      weaptype1_txt       weapsubtype1 
##             176852                  0                  0              20768 
##   weapsubtype1_txt          weaptype2      weaptype2_txt       weapsubtype2 
##                  0             168564                  0             170149 
##   weapsubtype2_txt          weaptype3      weaptype3_txt       weapsubtype3 
##                  0             179828                  0             179998 
##   weapsubtype3_txt          weaptype4      weaptype4_txt       weapsubtype4 
##                  0             181618                  0             181621 
##   weapsubtype4_txt         weapdetail              nkill            nkillus 
##                  0                  0              10313              64446 
##           nkillter             nwound           nwoundus           nwoundte 
##              66958              16311              64702              69143 
##           property         propextent     propextent_txt          propvalue 
##                  0             117626                  0             142702 
##        propcomment          ishostkid           nhostkid         nhostkidus 
##                  0                178             168119             168174 
##             nhours              ndays             divert      kidhijcountry 
##             177628             173567                  0                  0 
##             ransom          ransomamt        ransomamtus         ransompaid 
##             104310             180341             181128             180917 
##       ransompaidus         ransomnote     hostkidoutcome hostkidoutcome_txt 
##             181139                  0             170700                  0 
##          nreleased           addnotes             scite1             scite2 
##             171291                  0                  0                  0 
##             scite3           dbsource            INT_LOG           INT_IDEO 
##                  0                  0                  0                  0 
##           INT_MISC            INT_ANY            related 
##                  0                  0                  0
data2[is.na(data2)] <- 0
sapply(data2, function(x) sum(is.na(x)))
##            eventid              iyear             imonth               iday 
##                  0                  0                  0                  0 
##         approxdate           extended         resolution            country 
##                  0                  0                  0                  0 
##        country_txt             region         region_txt          provstate 
##                  0                  0                  0                  0 
##               city           latitude          longitude        specificity 
##                  0                  0                  0                  0 
##           vicinity           location            summary              crit1 
##                  0                  0                  0                  0 
##              crit2              crit3          doubtterr        alternative 
##                  0                  0                  0                  0 
##    alternative_txt           multiple            success            suicide 
##                  0                  0                  0                  0 
##        attacktype1    attacktype1_txt        attacktype2    attacktype2_txt 
##                  0                  0                  0                  0 
##        attacktype3    attacktype3_txt          targtype1      targtype1_txt 
##                  0                  0                  0                  0 
##       targsubtype1   targsubtype1_txt              corp1            target1 
##                  0                  0                  0                  0 
##            natlty1        natlty1_txt          targtype2      targtype2_txt 
##                  0                  0                  0                  0 
##       targsubtype2   targsubtype2_txt              corp2            target2 
##                  0                  0                  0                  0 
##            natlty2        natlty2_txt          targtype3      targtype3_txt 
##                  0                  0                  0                  0 
##       targsubtype3   targsubtype3_txt              corp3            target3 
##                  0                  0                  0                  0 
##            natlty3        natlty3_txt              gname           gsubname 
##                  0                  0                  0                  0 
##             gname2          gsubname2             gname3          gsubname3 
##                  0                  0                  0                  0 
##             motive        guncertain1        guncertain2        guncertain3 
##                  0                  0                  0                  0 
##         individual             nperps           nperpcap            claimed 
##                  0                  0                  0                  0 
##          claimmode      claimmode_txt             claim2         claimmode2 
##                  0                  0                  0                  0 
##     claimmode2_txt             claim3         claimmode3     claimmode3_txt 
##                  0                  0                  0                  0 
##          compclaim          weaptype1      weaptype1_txt       weapsubtype1 
##                  0                  0                  0                  0 
##   weapsubtype1_txt          weaptype2      weaptype2_txt       weapsubtype2 
##                  0                  0                  0                  0 
##   weapsubtype2_txt          weaptype3      weaptype3_txt       weapsubtype3 
##                  0                  0                  0                  0 
##   weapsubtype3_txt          weaptype4      weaptype4_txt       weapsubtype4 
##                  0                  0                  0                  0 
##   weapsubtype4_txt         weapdetail              nkill            nkillus 
##                  0                  0                  0                  0 
##           nkillter             nwound           nwoundus           nwoundte 
##                  0                  0                  0                  0 
##           property         propextent     propextent_txt          propvalue 
##                  0                  0                  0                  0 
##        propcomment          ishostkid           nhostkid         nhostkidus 
##                  0                  0                  0                  0 
##             nhours              ndays             divert      kidhijcountry 
##                  0                  0                  0                  0 
##             ransom          ransomamt        ransomamtus         ransompaid 
##                  0                  0                  0                  0 
##       ransompaidus         ransomnote     hostkidoutcome hostkidoutcome_txt 
##                  0                  0                  0                  0 
##          nreleased           addnotes             scite1             scite2 
##                  0                  0                  0                  0 
##             scite3           dbsource            INT_LOG           INT_IDEO 
##                  0                  0                  0                  0 
##           INT_MISC            INT_ANY            related 
##                  0                  0                  0
data2$attack_var <- data2$nkill
data2 <- data2 %>% mutate(attack_var = replace(attack_var,data2$nkill>=3& data2$nkill<10,2))
data2 <- data2 %>% mutate(attack_var = replace(attack_var, data2$nkill>=10,3))
data2 <- data2 %>% mutate(attack_var = replace(attack_var, data2$nkill<3,1))
df1 <- data2[data2$attack_var == "1", ]  
df2<- data2[data2$attack_var == "2", ]
df3<-data2[data2$attack_var == "3", ]

df1_num = data.frame(year=1970,df1 %>% filter(df1$iyear==1970) %>% summarize(attacks = n()))
df2_num = data.frame(year=1970,df2 %>% filter(df2$iyear==1970) %>% summarize(attacks = n()))
df3_num = data.frame(year=1970,df3 %>% filter(df3$iyear==1970) %>% summarize(attacks = n()))

for (n in 1971:2017)
{
    df1_num[nrow(df1_num) + 1,] = c(n, df1 %>% filter(df1$iyear==n) %>% summarize(attacks = n()))
    df2_num[nrow(df2_num) + 1,] = c(n, df2 %>% filter(df2$iyear==n) %>% summarize(attacks = n()))
    df3_num[nrow(df3_num) + 1,] = c(n, df3 %>% filter(df3$iyear==n) %>% summarize(attacks = n()))
}
library(ggplot2)
ggplot(df1_num, aes(x=year, y=attacks)) + geom_point()+ ggtitle ("Minor attacks") +
        theme(text=element_text(size=16))

ggplot(df2_num, aes(x=year, y=attacks)) + geom_point() + ggtitle ("Mid-sized attacks") + theme(text=element_text(size=16))

ggplot(df3_num, aes(x=year, y=attacks)) + geom_point() + ggtitle ("Major attacks") + theme(text=element_text(size=16))

m1 <- lm(attacks ~ year, data=df1_num)
summary(m1)
## 
## Call:
## lm(formula = attacks ~ year, data = df1_num)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3812.6 -1227.2   426.5   938.1  7588.4 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -278182.47   49616.07  -5.607 1.12e-06 ***
## year            141.10      24.89   5.669 9.03e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2389 on 46 degrees of freedom
## Multiple R-squared:  0.4113, Adjusted R-squared:  0.3985 
## F-statistic: 32.14 on 1 and 46 DF,  p-value: 9.035e-07
par(mfrow= c(2,2))
plot(m1)

m2 <- lm(attacks ~ year, data=df2_num)
summary(m2)
## 
## Call:
## lm(formula = attacks ~ year, data = df2_num)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -531.28 -358.27   58.38  144.83 1396.87 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -57296.024   8438.849  -6.790 1.89e-08 ***
## year            28.985      4.233   6.847 1.54e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 406.3 on 46 degrees of freedom
## Multiple R-squared:  0.5048, Adjusted R-squared:  0.494 
## F-statistic: 46.89 on 1 and 46 DF,  p-value: 1.545e-08
par(mfrow= c(2,2))
plot(m2)

m3 <- lm(attacks ~ year, data=df3_num)
summary(m3)
## 
## Call:
## lm(formula = attacks ~ year, data = df3_num)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -214.95 -127.53    6.84   43.26  544.24 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -20066.926   3278.105  -6.122 1.90e-07 ***
## year            10.162      1.644   6.180 1.55e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 157.8 on 46 degrees of freedom
## Multiple R-squared:  0.4536, Adjusted R-squared:  0.4418 
## F-statistic: 38.19 on 1 and 46 DF,  p-value: 1.555e-07
par(mfrow= c(2,2))
plot(m3)

ggplot(df1_num, aes(x=year, y=attacks)) + geom_point() + ggtitle ("Minor attacks (LM)") + theme(text=element_text(size=16)) + 
stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth")

ggplot(df2_num, aes(x=year, y=attacks)) + geom_point() + ggtitle ("Mid sized attacks (LM)") + theme(text=element_text(size=16)) + 
stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth")

ggplot(df3_num, aes(x=year, y=attacks)) + geom_point() + ggtitle ("Major attacks (LM)") + theme(text=element_text(size=16)) + 
stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth")

colors <- c("Major Attacks"="red","Small Attacks"="blue","Minor Attacks"="green")
ggplot()+
  geom_point(data=df1_num, aes(x=year,y=attacks,color="Major Attacks"), show.legend = TRUE)+
  geom_point(data=df2_num, aes(x=year,y=attacks,color="Minor Attacks"),show.legend = TRUE)+
  geom_point(data=df3_num, aes(x=year,y=attacks,color="Small Attacks"),show.legend = TRUE)+
  ggtitle('Three types of terror attacks') +
  labs(color = "Type of Attack")+
  scale_color_manual(values = colors)+
  scale_x_continuous("Year of Attacks")+
  scale_y_continuous("Number of Attacks")

library(tidyverse)
library(dplyr)
library(ggplot2)
world_data <- raw_data
world_data1 <- world_data[!is.na(world_data$latitude),]
world_data2 <- world_data1[!is.na(world_data1$longitude),]

world_data2$attack_var <- world_data2$nkill

world_data2 <- world_data2 %>% mutate(attack_var = replace(attack_var, world_data2$nkill>=3 & world_data2$nkill<10,2))
world_data2 <- world_data2 %>% mutate(attack_var = replace(attack_var, world_data2$nkill>=10,3))
world_data2 <- world_data2 %>% mutate(attack_var = replace(attack_var, world_data2$nkill<3,1))

world_data2[is.na(world_data2)] <- 0

wd1 <- world_data2[world_data2$attack_var == "1", ]
wd2 <- world_data2[world_data2$attack_var == "2", ]
wd3 <- world_data2[world_data2$attack_var == "3", ]
world_coordinates <- map_data("world")
options(repr.plot.width = 50, repr.plot.height =50)

ggplot() + geom_map(
    data = world_coordinates, map = world_coordinates,
    aes(long, lat, map_id = region), fill="grey"
  ) + 
geom_point(
    data = wd1,
    aes(x=longitude, y=latitude, size='attack_num')
  )
## Warning in geom_map(data = world_coordinates, map = world_coordinates,
## aes(long, : Ignoring unknown aesthetics: x and y
## Warning: Using size for a discrete variable is not advised.

library(ggmap)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
world <- map_data("world")

Major attacks

ggplot() +
  geom_map(
    data = world, map = world,
    aes(long, lat, map_id = region),
    color = "white", fill = "lightgray", size = 0.1
  ) +
  geom_point(
    data = df1,
    aes(longitude, latitude,
        color = nkill),
    alpha = 0.5
  ) +
  labs(x = NULL, y = NULL, color = NULL)+
  theme_void() +
  theme(legend.position = "none")+
  labs(title="Major Terror Attack Locations")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_map(data = world, map = world, aes(long, lat, map_id =
## region), : Ignoring unknown aesthetics: x and y

plot(x=df1_num$year,y=df1_num$attacks,main = "Major Terror Attacks",
     xlab = "Year",
     ylab = "Number of Attacks")
abline(m1,col = "red")

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
# Calculating Residuals
residuals <- m1$residuals

# Normality test
ks.test(residuals, rnorm(100,0,1))    # Since p value is very small, we can reject the null hypothesis
## 
##  Exact two-sample Kolmogorov-Smirnov test
## 
## data:  residuals and rnorm(100, 0, 1)
## D = 0.60417, p-value = 1.275e-11
## alternative hypothesis: two-sided
# And say that residuals do not follow a normal distritbution
shapiro.test(residuals)               # Small value of Shapiro-Wilk test also leads up to the same result
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.90946, p-value = 0.001288
# Constant Variance Assumption/Cook Weinberg Test
ncvTest(m1)                         # Small value indicates that assumption is violated
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 24.3818, Df = 1, p = 7.9012e-07
plot(m1$fitted.values, m1$residuals)

# Spread Level Plot
myspread <- spreadLevelPlot(m1)
## Warning in spreadLevelPlot.lm(m1): 
## 2 negative fitted values removed

myspread
## 
## Suggested power transformation:  0.2399187
y <- df1_num$attacks
x <- df1_num$year

# Spread Level transformation and New Model
z<-y^(myspread$PowerTransformation)
mylm2<-lm(z ~ x)
summary(mylm2)
## 
## Call:
## lm(formula = z ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.2990 -0.5286  0.1160  0.8684  2.1307 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -124.61586   28.76360  -4.332 7.93e-05 ***
## x              0.06569    0.01443   4.553 3.88e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.385 on 46 degrees of freedom
## Multiple R-squared:  0.3106, Adjusted R-squared:  0.2956 
## F-statistic: 20.73 on 1 and 46 DF,  p-value: 3.876e-05
# Cook Weinberg Test Again
ncvTest(mylm2)                      # a high value of p indicates constant variance
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 1.692439, Df = 1, p = 0.19328
plot(mylm2$fitted.values, mylm2$residuals)

# Normality Test 
residuals2 <- mylm2$residuals
n <- 47
sd1 <- sd(mylm2$residuals)

ks.test(rnorm(n,0,sd1),mylm2$residuals)   # Large value of p indicates that residuals are normal
## 
##  Exact two-sample Kolmogorov-Smirnov test
## 
## data:  rnorm(n, 0, sd1) and mylm2$residuals
## D = 0.15691, p-value = 0.5339
## alternative hypothesis: two-sided

Minor attacks

ggplot() +
  geom_map(
    data = world, map = world,
    aes(long, lat, map_id = region),
    color = "white", fill = "lightgray", size = 0.1
  ) +
  geom_point(
    data = df2,
    aes(longitude, latitude,
        color = nkill),
    alpha = 0.5
  ) +
  labs(x = NULL, y = NULL, color = NULL)+
  theme_void() +
  theme(legend.position = "none")+
  labs(title="Minor Terror Attack Locations")
## Warning in geom_map(data = world, map = world, aes(long, lat, map_id =
## region), : Ignoring unknown aesthetics: x and y

plot(x=df2_num$year,y=df2_num$attacks,main = "Minor Terror Attacks",
     xlab = "Year",
     ylab = "Number of Attacks")
abline(m2,col = "red")

# Calculating Residuals
residuals <- m2$residuals

# Normality test
ks.test(residuals, rnorm(100,0,1))    # Since p value is very small, we can reject the null hypothesis
## 
##  Exact two-sample Kolmogorov-Smirnov test
## 
## data:  residuals and rnorm(100, 0, 1)
## D = 0.60417, p-value = 1.275e-11
## alternative hypothesis: two-sided
# And say that residuals do not follow a normal distritbution
shapiro.test(residuals)               # Small value of Shapiro-Wilk test also leads up to the same result
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.8801, p-value = 0.0001522
# Constant Variance Assumption/Cook Weinberg Test
ncvTest(m2)                         # Small value indicates that assumption is violated
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 28.48745, Df = 1, p = 9.4308e-08
plot(m2$fitted.values, m2$residuals)

# Spread Level Plot
myspread <- spreadLevelPlot(m2)
## Warning in spreadLevelPlot.lm(m2): 
## 7 negative fitted values removed

myspread
## 
## Suggested power transformation:  0.1683115
y <- df2_num$attacks
x <- df2_num$year

# Spread Level transformation and New Model
z<-y^(myspread$PowerTransformation)
mylm2<-lm(z ~ x)
summary(mylm2)
## 
## Call:
## lm(formula = z ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.51658 -0.28888  0.03398  0.36740  0.51480 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -67.25423   10.14640  -6.628 3.29e-08 ***
## x             0.03501    0.00509   6.878 1.39e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4885 on 46 degrees of freedom
## Multiple R-squared:  0.507,  Adjusted R-squared:  0.4963 
## F-statistic: 47.31 on 1 and 46 DF,  p-value: 1.388e-08
# Cook Weinberg Test Again
ncvTest(mylm2)                      # a high value of p indicates constant variance
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.2236473, Df = 1, p = 0.63627
plot(mylm2$fitted.values, mylm2$residuals)

# Normality Test 
residuals2 <- mylm2$residuals
n <- 47
sd1 <- sd(mylm2$residuals)

ks.test(rnorm(n,0,sd1),mylm2$residuals)   # Large value of p indicates that residuals are normal
## 
##  Exact two-sample Kolmogorov-Smirnov test
## 
## data:  rnorm(n, 0, sd1) and mylm2$residuals
## D = 0.19193, p-value = 0.3017
## alternative hypothesis: two-sided

Small attacks

ggplot() +
  geom_map(
    data = world, map = world,
    aes(long, lat, map_id = region),
    color = "white", fill = "lightgray", size = 0.1
  ) +
  geom_point(
    data = df3,
    aes(longitude, latitude,
        color = nkill),
    alpha = 0.5
  ) +
  labs(x = NULL, y = NULL, color = NULL)+
  theme_void() +
  theme(legend.position = "none")+
  labs(title="Small Terror Attack Locations")
## Warning in geom_map(data = world, map = world, aes(long, lat, map_id =
## region), : Ignoring unknown aesthetics: x and y

plot(x=df3_num$year,y=df3_num$attacks,main = "Small Terror Attacks",
     xlab = "Year",
     ylab = "Number of Attacks")
abline(m3,col = "red")

# Calculating Residuals
residuals <- m3$residuals

# Normality test
ks.test(residuals, rnorm(100,0,1))    # Since p value is very small, we can reject the null hypothesis
## 
##  Exact two-sample Kolmogorov-Smirnov test
## 
## data:  residuals and rnorm(100, 0, 1)
## D = 0.52083, p-value = 1.384e-08
## alternative hypothesis: two-sided
# And say that residuals do not follow a normal distritbution
shapiro.test(residuals)               # Small value of Shapiro-Wilk test also leads up to the same result
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.85464, p-value = 2.895e-05
# Constant Variance Assumption/Cook Weinberg Test
ncvTest(m3)                         # Small value indicates that assumption is violated
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 29.20809, Df = 1, p = 6.5007e-08
plot(m3$fitted.values, m3$residuals)

# Spread Level Plot
myspread <- spreadLevelPlot(m3)
## Warning in spreadLevelPlot.lm(m3): 
## 5 negative fitted values removed

myspread
## 
## Suggested power transformation:  0.2107483
y <- df3_num$attacks
x <- df3_num$year

# Spread Level transformation and New Model
z<-y^(myspread$PowerTransformation)
mylm2<-lm(z ~ x)
summary(mylm2)
## 
## Call:
## lm(formula = z ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6487 -0.3364 -0.0645  0.4596  0.9958 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -85.621139  12.558213  -6.818 1.71e-08 ***
## x             0.044290   0.006299   7.031 8.19e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6046 on 46 degrees of freedom
## Multiple R-squared:  0.518,  Adjusted R-squared:  0.5075 
## F-statistic: 49.43 on 1 and 46 DF,  p-value: 8.193e-09
# Cook Weinberg Test Again
ncvTest(mylm2)                      # a high value of p indicates constant variance
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.7836975, Df = 1, p = 0.37601
plot(mylm2$fitted.values, mylm2$residuals)

# Normality Test 
residuals2 <- mylm2$residuals
n <- 47
sd1 <- sd(mylm2$residuals)

ks.test(rnorm(n,0,sd1),mylm2$residuals)   # Large value of p indicates that residuals are normal
## 
##  Exact two-sample Kolmogorov-Smirnov test
## 
## data:  rnorm(n, 0, sd1) and mylm2$residuals
## D = 0.2141, p-value = 0.1972
## alternative hypothesis: two-sided